'''
@author : xaing
@time : 2018/4/2 19:31
@地址 : 广州
@作用 : 爬取91的视频,爬取到91视频并将视频保存到数据库中
@结果 :
'''


import requests
import re
import json
import sqliteCRUD as crud


def getPage(url):
    '''
    获取页码
    :param url: 网址
    :return:
    '''
    headers = {'Accept-Language': 'zh-CN,zh;q=0.9', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'}
    response = requests.get(url, headers=headers)
    response.encoding = "utf-8"
    html = re.findall('<img\ssrc=".*?_(\d+).jpg".*?title="(.*?)".*?href="(.*?)".*?时长:</span>(.*?)\\n.*?添加时间:</span>(.*?)<br.*?收藏:</span> (\d+)', response.text, re.S)
    return html

def get_json(items):
    '''
    解析json数据
    :param items:
    :return:
    '''
    for item in items:
        yield {
            'index': item[0],
            'title': item[1],
            'long': item[3],
            'date': item[4],
            'collection': item[5],
            'href': item[2]
        }


def write_file(item):
    '''
    写入数据
    :param item:
    :return:
    '''
    with open('E:\\下载\\91.txt', 'a') as f:
        try:
            f.write(json.dumps(item, ensure_ascii=False) + '\n')
        except Exception as e:
            print(e)
        f.close()

def main(page):
    url = 'http://91.91p31.space/v.php?category=mf&viewtype=basic&page=' + page
    print('完成：第', page, '页')
    items = getPage(url)
    for item in get_json(items):
        write_file(item)


# if __name__ == '__main__':
#     for page in range(0, 1):
#         main(str(page))





# 创建数据库 已经创建  table91
# crud.create91SQLDB()
for p in range(0, 4324):
    page = str(p)
    url = 'http://91.91p31.space/v.php?category=mf&viewtype=basic&page=' + page
    print('开始爬取第', page, '页······')
    # 获取到数据
    items = getPage(url)
    for item in get_json(items):
        print(item)
        # 解析字典
        index = item['index']
        title = item['title']
        long = item['long']
        date = item['date']
        collection = item['collection']
        href = item['href']
        # 拼接insert语句 插入到数据库中
        insertSql = '''INSERT INTO table91 (id, title, duration,year, collection, href) VALUES(?, ?, ?, ?, ?, ?)'''
        dataDB = [(index, title, long, date, collection, href)]
        conn = crud.getConnection()
        crud.executeSQL(conn, insertSql, dataDB)